Exploration into trying to detect climate regions using the rainfall data.
library(knitr)
source("./Scripts/Settings.R")
source("./Scripts/AusWeather.R")
source("./Scripts/AusClimate.R")
source("./Scripts/AusMaps.R")
source("./Scripts/Utility.R")
regional_valuations <- function() {
message("Evaluating Baseline Stations")
baseline_stations <- aus_baseline_stations(aus_rainfall_valuation_info)
message("Loading Observations")
observations <- aus_clean_observations(baseline_stations$Station, aus_rainfall_clean_info)
message("Evaluating Baselines")
baselines <- aus_valuation_baselines(observations, aus_rainfall_valuation_info)
message("Evaluating Valuations")
valuations <- aus_valuations(observations, aus_rainfall_valuation_info, baselines = baselines)
message("Evaluation Complete")
list(
stations = baseline_stations,
observations = observations,
baselines = baselines,
valuations = valuations
)
}
memorised_regional_valuations <- aus_memorise(regional_valuations, aus_rainfall_observations_info,
paste0("RainRegionsValuations v", aus_rainfall_version))
stations <- memorised_regional_valuations$stations
observations <- memorised_regional_valuations$observations
baselines <- memorised_regional_valuations$baselines
valuations <- memorised_regional_valuations$valuations
Plot a map of australia wide rainfall baselines for each month
baselines %>%
inner_join(stations, by = c("Zone", "Station")) %>%
ggplot() %>%
aus_dark_map() +
geom_point(aes(x = Longitude, y = Latitude, color = log(Baseline.Mean)), size = 0.1) +
scale_colour_distiller(type = "div", palette="RdBu", direction = 1) +
guides(colour=FALSE) +
facet_wrap(~ Minor_Period)
NA
Try using kmeans to try to detect regions with similar variation.
Plot a map of australia wide rainfall anamolies for a series of years.
valuations %>%
filter(Major_Period >= 2010, Major_Period <= 2015, Minor_Period == 1) %>%
inner_join(stations, by = c("Zone", "Station")) %>%
ggplot() %>%
aus_dark_map() +
geom_point(aes(x = Longitude, y = Latitude, color = Rainfall.Anamoly), size = 0.1) +
scale_colour_distiller(type = "div", palette="RdBu", direction = 1) +
guides(colour=FALSE) +
facet_wrap(~ Major_Period)
It looks like anamolies occur in distinct regions. And its quite easy to see.
Try using kmeans to try to detect regions that vary together. Also could use correlation. The corclust method in the klaR package can be used to detect clusters.
Try finding clusters using baseline rainfalls. Spread the monthly rainfalls into columns
station_rainfall_means <- baselines %>%
select(Zone, Station, Rainfall.Mean = Minor_Period, Baseline.Mean) %>%
spread(key = Rainfall.Mean, value = Baseline.Mean, sep = ".") %>%
ungroup()
station_rainfall_sds <- baselines %>%
select(Zone, Station, Rainfall.SD = Minor_Period, Baseline.SD) %>%
spread(key = Rainfall.SD, value = Baseline.SD, sep = ".") %>%
ungroup()
station_rainfall <- station_rainfall_means %>% inner_join(station_rainfall_sds, by = c("Zone", "Station"))
station_rainfall
station_centers_mean_groups <- function(centers) {
station_kmeans <- station_rainfall_means %>% select(-Zone, -Station) %>% kmeans(centers)
station_rainfall_means$Centers <- centers
station_rainfall_means$Group <- station_kmeans$cluster
station_rainfall_means
}
centers <- 4:20
station_rainfall_mean_by_centers <- centers %>% map_dfr(station_centers_mean_groups)
for (center in centers) {
p <- station_rainfall_mean_by_centers %>%
inner_join(stations, by = c("Zone", "Station")) %>%
filter(Centers == center) %>%
ggplot() %>%
aus_dark_map() +
geom_point(aes(x = Longitude, y = Latitude, color = Group)) +
scale_colour_distiller(type = "div", palette="RdBu") +
guides(colour=FALSE) +
ggtitle(paste0("Centers = ", center))
print(p)
}
station_centers_sd_groups <- function(centers) {
station_kmeans <- station_rainfall_sds %>% select(-Zone, -Station) %>% kmeans(centers)
station_rainfall_sds$Centers <- centers
station_rainfall_sds$Group <- station_kmeans$cluster
station_rainfall_sds
}
centers <- 4:20
station_rainfall_sd_by_centers <- centers %>% map_dfr(station_centers_sd_groups)
did not converge in 10 iterations
for (center in centers) {
p <- station_rainfall_sd_by_centers %>%
inner_join(stations, by = c("Zone", "Station")) %>%
filter(Centers == center) %>%
ggplot() %>%
aus_dark_map() +
geom_point(aes(x = Longitude, y = Latitude, color = Group)) +
scale_colour_distiller(type = "div", palette="RdBu") +
guides(colour=FALSE)
print(p)
}
SDs alone it not so distinct and the algor has trouble. Use just the means so their is no issue in getting values on the same scale.
Finds group with similar rainfall distributions. Picks out the arid regions, coast regions and so one.
However tends to do things like group areas on the east and west coast of Aus which I know tend to vary seperately.
10 Centers looks about nice. It picks out the areas that are in drought right now.
Save as station_regional_clusters
station_regional_clusters <- station_rainfall_means %>%
select(-Zone, -Station) %>%
kmeans(10)
station_regions <- station_rainfall_means
station_regions$RegionID <- station_regional_clusters$cluster
station_regions <- station_regions %>% select(Zone, Station, RegionID, everything())
stations %>%
inner_join(station_regions, by = c("Zone", "Station")) %>%
ggplot() %>%
aus_dark_map() +
geom_point(aes(x = Longitude, y = Latitude, color = RegionID)) +
scale_colour_distiller(type = "div", palette="RdBu")
Generate anamoly grouped by regions
plot(valuations %>%
inner_join(stations, by = c("Zone", "Station")) %>%
inner_join(station_regions, by = c("Zone", "Station")) %>%
filter(Major_Period > 1900) %>%
group_by(RegionID, Median_Date) %>%
summarise(Rainfall.Anamoly = mean(Rainfall.Anamoly)) %>%
ggplot(aes(x = Median_Date, y = Rainfall.Anamoly)) +
geom_line(color="lightgrey") +
geom_smooth(method = "loess", formula = y ~ x, span = .1) +
facet_wrap(~ RegionID) + ylim(-1, 1))
for (regionID in 1:10) {
gridExtra::grid.arrange(
stations %>%
inner_join(station_regions, by = c("Zone", "Station")) %>%
filter(RegionID == regionID) %>%
ggplot() %>%
aus_dark_map() +
geom_point(aes(x = Longitude, y = Latitude)) +
guides(colour=FALSE),
valuations %>%
inner_join(stations, by = c("Zone", "Station")) %>%
inner_join(station_regions, by = c("Zone", "Station")) %>%
filter(RegionID == regionID) %>%
filter(Major_Period > 1900) %>%
group_by(Median_Date) %>%
summarise(Rainfall.Anamoly = mean(Rainfall.Anamoly)) %>%
ggplot(aes(x = Median_Date, y = Rainfall.Anamoly)) +
geom_line(color="grey") +
geom_smooth(method = "loess", formula = y ~ x, span = .2) +
ylim(-1, 1),
ncol = 2
)
}
It sorta works. You can see the major drought in eastern australia. However some areas are associated that probably shouldn’t be.
Cluster stations that tend to vary together.
Try kmeans to start cos its easy!
Try spreading all periods during the baseline period into one set and use kmeans to group stations.